package com.xbai.spark.recall.engine.training

import org.apache.spark.sql.{DataFrame, SparkSession}

/**
  * @author xbai
  * @Date 2021/1/14
  */
class UserCFTraining {

  /**
    * 用户召回训练
    * @param data userId-itemId-rating
    * @param spark spark环境
    * @return userSimilar：userId-userId1-usersim
    */
  def trainingEngine(data: DataFrame, spark: SparkSession): DataFrame = {
    val userCF = new UserCF
    val userSqrtRatingSum: DataFrame = userCF.getUserSqrtRatingSum(data, spark)
    val userSameItem: DataFrame = userCF.getUserSameItemProductSum(data, spark)
    val userSimilar: DataFrame = userCF.userSimilarity(userSqrtRatingSum, userSameItem, spark)
    // 可以每周训练一次，存入表中
    userSimilar.write.mode("overwrite").saveAsTable("userSimilar")
    println("========== userSimilar ==========")
    userSimilar.show(3)
    userSimilar
  }
}
